import requests
import re
import pandas
import json
import time
import jsonpath
from fake_useragent import UserAgent

ua = UserAgent()


def getUrl(page):
    headers = {
        'Host': 'www.lagou.com',
        'User-Agent': ua.random,
        'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
        'Cookie': '_ga=GA1.2.835643970.1550115033; user_trace_token=20190214113018-d9f37927-3008-11e9-81d3-5254005c3644; LGUID=20190214113018-d9f37c5f-3008-11e9-81d3-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAAABAAAGFABEFE45F3B5BA01B40175934669B0BC43A26; _gat=1; LGSID=20190228163241-69f3f645-3b33-11e9-885b-5254005c3644; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D9ZsceuLKRzhM6l4-oglzCPMoctir24Cy1CPJc8lBqx7%26wd%3D%26eqid%3Db093fc1c0002256c000000035c779ca5; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_search; LGRID=20190228163250-6ed76cbb-3b33-11e9-885b-5254005c3644; SEARCH_ID=701912755f6141f7b794b9daac01a787'
        }
    print('打印第%s页' % page)
    cookie = '_ga=GA1.2.835643970.1550115033; user_trace_token=20190214113018-d9f37927-3008-11e9-81d3-5254005c3644; LGUID=20190214113018-d9f37c5f-3008-11e9-81d3-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAAABAAAGFABEFE45F3B5BA01B40175934669B0BC43A26; _gat=1; LGSID=20190228163241-69f3f645-3b33-11e9-885b-5254005c3644; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D9ZsceuLKRzhM6l4-oglzCPMoctir24Cy1CPJc8lBqx7%26wd%3D%26eqid%3Db093fc1c0002256c000000035c779ca5; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_search; LGRID=20190228163250-6ed76cbb-3b33-11e9-885b-5254005c3644; SEARCH_ID=701912755f6141f7b794b9daac01a787'
    data = {
        'first': 'true',
        'pn': page,
        'kd': 'python'
        }
    # proxies = {
    #     'https': 'https://61.8.78.130:8080'
    # }
    url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
    response = requests.post(url, data=data, headers=headers)
    html = response.text
    print(html)
    # print(jsonpath.jsonpath(html, "$..result"))


getUrl(1)


def main():
    for page in range(1, 30):

        jobs_dcts = getUrl(page)
        for jobs in jobs_dcts:
            # print(jobs)
            i = [(jobs['positionAdvantage'], jobs['city'], jobs['imState'],
                  jobs['companyShortName'], jobs['industryField'],
                  jobs['workYear'], jobs['salary'], jobs['education'],
                  jobs['secondType'], jobs['jobNature'], jobs['positionLables'],
                  jobs['companySize'], jobs['linestaion'], jobs['firstType'],
                  jobs['createTime'], jobs['positionName'],
                  jobs['companyLabelList'],
                  jobs['companyFullName'])]
            print(i)

            data = pandas.DataFrame(i)
            data.to_csv(r'jobs.csv', header=False, index=False, mode='a+')
            data.to_csv()
            time.sleep(3)

# main()
